clear all
tempfile tempsave
set seed ${seed}


/*Bartik (Shenhav 2020)*/
/*First Component: Industry shares within Gender x Education x Race/Ethnicity x Census Region - cells in base year*/
use 		ind 	sex educ reth cregion empl_1970_iserc	///
using	"${temp}base_iserc.dta", 	clear
merge m:1 			sex educ reth cregion 					///
using 	"${temp}base_serc.dta",  	nogen keepusing(empl_1970_serc)
gen w_1970_i=empl_1970_iserc/empl_1970_serc
expand ${size_year}
bysort ind sex educ reth cregion: gen year=1976-1+_n

	*Check
gcollapse (sum) check=w_1970_i, by(year sex educ reth cregion) merge
sum check, det
drop check

save "${temp}b1_1.dta", replace

/*Second Component: Occupation shares within Gender x Education x Race/Ethnicity - cells in base year at national level*/
use	ind occ sex educ reth empl_1970_ioser 		///
using	"${temp}base_ioser.dta", 	clear
merge m:1 ind sex educ reth						///
using 	"${temp}base_iser.dta", 	nogen keepusing(empl_1970_iser)

gen w_1970_io=(empl_1970_ioser/empl_1970_iser)

expand ${size_cregion}
bysort 		ind	occ	sex educ reth: gen cregion=_n
expand ${size_year}
bysort 		ind	occ	sex educ reth cregion: gen year=1976-1+_n
save "${temp}b1_2.dta", replace

/*Third Component: Annual within-industry occupation growth (relative to overall occupation growth) relative to base year at (jacknife) national level*/
use 		ind occ 				cregion year	empl_io* ///
using	"${temp}dev_io.dta", 	clear
merge m:1 		occ 				cregion year 			 ///
using	"${temp}dev_o.dta",		nogen keepusing(empl_o*)
merge m:1	ind						cregion year 			 ///
using	"${temp}dev_i.dta", 	nogen keepusing(empl_i*)
merge m:1 			 	 			cregion year			 ///
using	"${temp}dev_tot.dta", 	nogen keepusing(empl_tot*)
merge m:1	ind occ 				cregion 				 ///
using	"${temp}base_io.dta", 	nogen keepusing(empl_1970_io*)
merge m:1 		occ 				cregion  				 ///
using	"${temp}base_o.dta",	nogen keepusing(empl_1970_o*)
merge m:1	ind						cregion  				 ///
using	"${temp}base_i.dta", 	nogen keepusing(empl_1970_i*)
merge m:1 			 	 			cregion 				 ///
using	"${temp}base_tot.dta", 	nogen keepusing(empl_1970_tot*)

foreach x in io o i tot{
	gen empl_`x'_b=((empl_`x'+1)/(empl_1970_`x'+1)) /*Add +1 to ensure existence of all cells*/
}
gen w_t_io=(empl_io_b/empl_i_b)/(empl_o_b/empl_tot_b)

save "${temp}b1_3.dta", replace

/*Fourth Component: Annual wages/employment levels in Industry x Occupation cells at (jacknife) national level */
use			ind occ 				cregion year *_io		 ///
using	"${temp}dev_io.dta", 	clear
merge m:1 	ind		 	 			cregion year 			 ///
using	"${temp}dev_i.dta", 	nogen
merge m:1 			 	 			cregion year			 ///
using	"${temp}dev_tot.dta", 	nogen keepusing(empl_tot)

save "${temp}b1_4.dta", replace

/*Consolidate*/
	/*->Calculate annual predicted within-industry wages in Gender x Education x Race/Ethnicity x Census Region cells*/
use 	 	ind occ 				cregion year *_io *_i	 ///
using	"${temp}b1_4.dta", 		clear
merge 1:1 	ind occ 				cregion year			 ///
using	"${temp}b1_3.dta", 		nogen keepusing(w_t_io)
merge 1:m 	ind occ					cregion year 			 ///
using	"${temp}b1_2.dta", 		nogen keepusing(sex educ reth w_1970_io)
merge m:1 	ind 	sex educ reth	cregion year			 ///
using	"${temp}b1_1.dta", 		nogen keepusing(w_1970_i)

gegen w_o_norm=sum(w_1970_io*w_t_io), by(ind sex reth educ cregion year)
gen w_o=w_1970_io*w_t_io/w_o_norm

	*Version 1: Including Updating Term (Shenhav 2020)
gcollapse (mean) wage* [aw=w_o], by(ind sex educ reth cregion year w_1970_i)
rename (wage*) (wage*_1)

	/*->Calculate annual predicted wages/(relative) employment levels in Gender x Education x Region cells*/
gcollapse (mean) wage* [aw=w_1970_i], by(sex educ reth cregion year)

drop wage_i_1
foreach x of varlist *_io_1{
	local name="b_"+regexr("`x'","_io_1","")
	rename `x' `name'
}

** Imputation 1974 1975
forvalues i=1974(1)1975{
	preserve
	keep if year==1976
	replace year=`i'
	replace b_wage=.
	save "tempsave", replace
	restore
	append using "tempsave"
}
reg b_wage c.year##(i.sex i.educ i.reth i.cregion)
predict w_hat if year<1976
replace b_wage=w_hat if !missing(w_hat)

/*Restrict and Save*/
keep sex educ reth cregion year b_wage
save "${temp}bartik.dta", replace
